#!/bin/sh

phymmbl=$1

awk 'BEGIN{FS="\t"}{print $12}' $phymmbl > unique.phyla.list

sed -i '/^$/d' unique.phyla.list
sed -i '/PHYLUM/d' unique.phyla.list


for i in `cat unique.phyla.list`;do 
    grep $i $phymmbl | awk 'BEGIN{FS="\t"}{print $4}' | sort | uniq > $i.genus.list
    echo "Done with $i"
done

for i in `ls *.genus.list`;do 
    sed -i '/^$/d' $i
    sed -i 's/Candidatus //g' $i
done

for i in `cat unique.phyla.list`;do 
    for j in `cat $i.genus.list`;do 
        grep -c $j $phymmbl >> $i.tmp
    done
    paste $i.genus.list $i.tmp > $i.genus.counts
    rm $i.tmp
    echo "Done with $i"
done
